import urllib.request as ur
import lxml.etree as le
import user_agent


'''
创建一个request方法
'''
def getRequest(url):
    ur.Request(
        url=url,
        headers={
            'User_Agent':user_agent.get_user_agent_pc(),
            'Cookies':'TY_SESSION_ID=a10a4b65-de90-4a8a-9206-8d9627be957e; JSESSIONID=62CFE8FF67EB70A0DAE5FA339E1B3FC0; uuid_tt_dd=10_36832054360-1516582551080-431999; ARK_ID=JS5ebee742bfd5f8a7ceaa734f048b9fe35ebe; _ga=GA1.2.795091608.1542351168; smidV2=201812141508172cadee73f3736e9bbdec1a81e5f736bd002b2a0dcb0fe8450; __yadk_uid=H7RFrwiqAyPDS98m581yaqVEt9KuTu2w; UM_distinctid=169800351632a6-014200b75daf85-4313362-144000-169800351651169; dc_session_id=10_1560126867411.936030; UserName=qq_35408086; UserInfo=f575835def074b56ac8183fb12729e76; UserToken=f575835def074b56ac8183fb12729e76; UserNick=qq_35408086; AU=884; UN=qq_35408086; BT=1563801032095; p_uid=U000000; Hm_ct_6bcd52f51e9b3dce32bec4a3997715ac=1788*1*PC_VC!5744*1*qq_35408086!6525*1*10_36832054360-1516582551080-431999; acw_tc=2760822215638464155398922e230fbc6cecaa50a8c583ce96408c48142f1e; Hm_lvt_eb5e3324020df43e5f9be265a8beb7fd=1566182075; Hm_ct_eb5e3324020df43e5f9be265a8beb7fd=5744*1*qq_35408086!6525*1*10_36832054360-1516582551080-431999; Hm_lvt_6bcd52f51e9b3dce32bec4a3997715ac=1566282847,1566282866,1566282874,1566282885; dc_tos=pwiwoe; Hm_lpvt_6bcd52f51e9b3dce32bec4a3997715ac=1566284271'
        }
    )

def getProxyAddr():
    proxyAddr = ur.urlopen('').read().decode().strip()
    proxyHandler = ur.ProxyHandler(
        {
            'http':proxyAddr
        }
    )
    return ur.build_opener(proxyHandler)
pn_start= 1
pn_end= 3
keyword='python'
for pn in range(pn_start,pn_end+1):
    try:
        request =getRequest(
            'https://so.csdn.net/so/search/s.do?p=%s&q=%s&t=blog&domain=&o=&s=&u=&l=&f=&rbg=0' % (pn, keyword)
        )
        response = getProxyAddr().open(request).read()
        href_s = le.HTML(response).xpath('//span[@class="down fr"]/../span[@class="link"]/a/@href')
        for href in href_s:
            try:
                response_blog = getProxyAddr().open(
                    getRequest(href)
                ).read()
                title = le.HTML(response_blog).xpath('//h1[@class="title-article"]/text()')
                with open('%s.html'%title,'w',encoding='utf-8') as f:
                    f.write(response_blog)
            except Exception as e:
                print(e)
    except:pass


